# Codes to replicate the descriptive analyses in the appendix




##########################
# load data and packages #
##########################

library(stargazer)
library(readstata13)
library(ggplot2)

sessionInfo() # last run
#R version 4.4.3 (2025-02-28 ucrt)
#Platform: x86_64-w64-mingw32/x64
#Running under: Windows 11 x64 (build 26100)
#Matrix products: default
#locale:
#[1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8    LC_MONETARY=English_United States.utf8
#[4] LC_NUMERIC=C                           LC_TIME=English_United States.utf8    
#time zone: Asia/Taipei
#tzcode source: internal
#attached base packages:
#[1] stats     graphics  grDevices utils     datasets  methods   base     
#other attached packages:
#[1] ggplot2_3.5.1      stargazer_5.2.3    readstata13_0.10.1
#loaded via a namespace (and not attached):
#[1] vctrs_0.6.5       zip_2.3.2         cli_3.6.4         rlang_1.1.5       stringi_1.8.7     generics_0.1.3   
#[7] glue_1.8.0        colorspace_2.1-1  plyr_1.8.9        scales_1.3.0      grid_4.4.3        munsell_0.5.1    
#[13] tibble_3.2.1      openxlsx_4.2.8    lifecycle_1.0.4   compiler_4.4.3    dplyr_1.1.4       Rcpp_1.0.14      
#[19] pkgconfig_2.0.3   rstudioapi_0.17.1 R6_2.6.1          tidyselect_1.2.1  pillar_1.10.1     magrittr_2.0.3   
#[25] tools_4.4.3       withr_3.0.2       gtable_0.3.6 

df <- read.dta13("MainCandidateData.dta")

load("FirstPersonPronounsCount.RData")




##############################################################
# Appendix A Table A.1 (proportion of first person pronouns) #
##############################################################

pps <- data.frame(word = colnames(fps_count),
                  count = colSums(fps_count),
                  row.names = NULL)

pps$prop <- pps$count / sum(pps$count)

pps <- pps[order(pps$prop, decreasing=TRUE),]

pps$prop <- round(pps$prop, 4)

stargazer(t(t(pps[,c("word", "prop")])))




##################################
# Appendix C (descriptive stats) #
##################################

# Table C.1
sub <- df[,c("log_personalpronoun_name_prop2", "log_partyname_prop2",
             "Iadjust", "C_M", "C_Padjust",
             "gender", "age", "incumbent", "log_totwin", "dualnominate",
             "log_distpopulation", "distpop65", "log_distprimary")]
colnames(sub) <- gsub("_", "", colnames(sub))

stargazer(sub,
          font.size="footnotesize", omit.summary.stat=c("p25","p75"),
          covariate.labels=c("References to Self (log)",
                             "References to Parties (log)",
                             "I'", "C/M", "C/P_t-1",
                             "Female", "Age", "Incumbent", 
                             "Tenure (log)", "Dual Nomination",
                             "District Population (log)",
                             "District Age above 65 Proportion",
                             "District Primary Industry Proportion (log)"),
          digits=2)


# Table C.2
round(cor(df[,c("Iadjust", "C_M", "C_Padjust")],
          use="pairwise.complete.obs"), 2)




######################################
# Appendix G Figure G.1 (simulation) #
######################################

effnumc<-function(vec){
  p<-0
  for(i in 1:length(vec)){
    p<-p+(vec[i])^2
  }
  return(1/p)
}

I<-function(C,P,E){
  E<-effnumc(E)
  output<-(C/P)*(E/P)
  return(output)
}

coverm<-function(C,M){
  output<-C/M
  return(output)
}

coverp<-function(C,P){
  output<-C/P
  return(output)
}

is.wholenumber <-
  function(x, tol = .Machine$double.eps^0.5)  abs(x - round(x)) < tol

#C=candidates #of copartisan candidates
#P=party magnitude (t-1) #The number of cans they think they can win
#E=effective number of cans within a party (t-1)

esim<-function(C){
  x<-runif(C,0,1)
  vec<-x/sum(x)
  return(vec)
}

districtsim1<-function(x){
  alldistricts<-data.frame(NULL)
  for(i in 1:x){
    M<-sample(3:20,1)
    C<-sample(1:round(1.1*M),1)
    if (C>M){
      P<-sample(1:M,1)
    } else{
      P<-sample(1:C,1)
    }
    vec<-esim(C)
    E<-effnumc(vec)
    ivalue<-round(I(C,P,vec),4)
    cmval<-coverm(C,M)
    cpval<-coverp(C,P)
    eminusp<-(E-P)
    diff1<-abs(cmval-ivalue)
    diff2<-abs(cpval-ivalue)
    alldistricts<-rbind(alldistricts,c(C,P,M,ivalue,cmval,cpval,E,eminusp,diff1,diff2))
  }
  colnames(alldistricts)<-c("C","P","M","I","C/M","C/P","E","eminusp","diff1","diff2")
  return(alldistricts)
}

sample<-districtsim1(10000)

colnames(sample)

sample$CminusP <- sample$C - sample$P
sample$PminusE <- sample$P - sample$E
sample$`E/P` <- sample$E / sample$P

sample2_1 <- sample[,c("M", "diff1")]
colnames(sample2_1)[2] <- "diff"
sample2_1$Type <- "I' vs. C/M"
sample2_2 <- sample[,c("M", "diff2")]
colnames(sample2_2)[2] <- "diff"
sample2_2$Type <- "I' vs. C/P"
sample2 <- rbind(sample2_1, sample2_2)

ggplot(data = sample2, aes(x = M, y = diff, colour = Type)) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_smooth(method = "loess", level=0.99, lwd = 1.5) +
  ylab("Absolute Difference between\nI' and C/M or C/P") +
  theme_bw() +
  theme(axis.title.x = element_text(size = 15),
        axis.title.y = element_text(size = 15))

ggsave("FigureG.1.pdf", width=10, height=6)




###############################################################
# Appendix G Table G.2 (model comparisons C/M vs. C/P vs. I') #
###############################################################

dfsub <- na.omit(df[,c("log_personalpronoun_name_prop2", "log_partyname_prop2",
                       "Iadjust", "C_P", "C_M",
                       "gender", "age", "incumbent", "log_totwin",
                       "log_distpopulation", "distpop65", "log_distprimary")])

cm1 <- lm(log_personalpronoun_name_prop2 ~ C_M
          + gender + age + incumbent + log_totwin
          + log_distpopulation + distpop65 + log_distprimary,
          data=dfsub)

cp1 <- lm(log_personalpronoun_name_prop2 ~ C_P
          + gender + age + incumbent + log_totwin
          + log_distpopulation + distpop65 + log_distprimary,
          data=dfsub)
# using C_Padjust leads to the same results

i1 <- lm(log_personalpronoun_name_prop2 ~ Iadjust
         + gender + age + incumbent + log_totwin
         + log_distpopulation + distpop65 + log_distprimary,
         data=dfsub)

cm2 <- lm(log_partyname_prop2 ~ C_M
          + gender + age + incumbent + log_totwin
          + log_distpopulation + distpop65 + log_distprimary,
          data=dfsub)

cp2 <- lm(log_partyname_prop2 ~ C_P
          + gender + age + incumbent + log_totwin
          + log_distpopulation + distpop65 + log_distprimary,
          data=dfsub)
# using C_Padjust leads to the same results

i2 <- lm(log_partyname_prop2 ~ Iadjust
         + gender + age + incumbent + log_totwin
         + log_distpopulation + distpop65 + log_distprimary,
         data=dfsub)

dfsub$cm1resid <- cm1$residuals^2
dfsub$cp1resid <- cp1$residuals^2
dfsub$i1resid <- i1$residuals^2

dfsub$cm2resid <- cm2$residuals^2
dfsub$cp2resid <- cp2$residuals^2
dfsub$i2resid <- i2$residuals^2

dfsub$cm1resid <- abs(cm1$residuals)
dfsub$cp1resid <- abs(cp1$residuals)
dfsub$i1resid <- abs(i1$residuals)

dfsub$cm2resid <- abs(cm2$residuals)
dfsub$cp2resid <- abs(cp2$residuals)
dfsub$i2resid <- abs(i2$residuals)

resids1 <- data.frame(resid=c(dfsub$cm1resid,
                              dfsub$cp1resid,
                              dfsub$i1resid),
                      measure=rep(c("C/M", "C/P", "I'"), each=nrow(dfsub)))

resids2 <- data.frame(resid=c(dfsub$cm2resid,
                              dfsub$cp2resid,
                              dfsub$i2resid),
                      measure=rep(c("C/M", "C/P", "I'"), each=nrow(dfsub)))

cm1op <- length(which(dfsub$cm1resid < dfsub$cp1resid & dfsub$cm1resid < dfsub$i1resid))
cp1op <- length(which(dfsub$cp1resid < dfsub$cm1resid & dfsub$cp1resid < dfsub$i1resid))
i1op <- length(which(dfsub$i1resid < dfsub$cm1resid & dfsub$i1resid < dfsub$cp1resid))

cm2op <- length(which(dfsub$cm2resid < dfsub$cp2resid & dfsub$cm2resid < dfsub$i2resid))
cp2op <- length(which(dfsub$cp2resid < dfsub$cm2resid & dfsub$cp2resid < dfsub$i2resid))
i2op <- length(which(dfsub$i2resid < dfsub$cm2resid & dfsub$i2resid < dfsub$cp2resid))

optab <- cbind(cm1op, cp1op, i1op)
optab <- rbind(optab, optab / sum(optab))
optab <- rbind(optab, c(cm2op, cp2op, i2op))
optab <- rbind(optab, optab[3,] / sum(optab[3,]))

optab <- as.data.frame(optab)
colnames(optab) <- c("C/M", "C/Pt−1", "I'")

optab$total <- rowSums(optab)

stargazer(t(t(optab)), digits=2)
